\documentclass[t,12pt,aspectratio=169]{beamer} % 16:9 宽屏比例，适合现代投影
\usepackage{ctex} % 中文支持
\usepackage{amsmath, amssymb} % 数学公式与符号
\usepackage{graphicx}
\usepackage{url}
\usepackage{verbatim}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 插入代码
\usepackage{listings}
\usepackage{color}

% 设置列表的样式
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

\lstdefinestyle{mystyle}{
    backgroundcolor=\color{backcolour},   
    commentstyle=\color{codegreen},
    keywordstyle=\color{magenta},
    numberstyle=\tiny\color{codegray},
    stringstyle=\color{codepurple},
    basicstyle=\ttfamily\footnotesize,
    breakatwhitespace=false,         
    breaklines=true,                 
    captionpos=b,                    
    keepspaces=true,                 
    numbers=left,                    
    numbersep=5pt,                  
    showspaces=false,                
    showstringspaces=false,
    showtabs=false,                  
    tabsize=2
}

\lstset{style=mystyle}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% 主题设置（推荐简洁风格）
\usetheme{Madrid}
\usecolortheme{default} % 可选：seahorse, beaver, dolphin 等

\title{R语言统计入门第7章：方差分析和KW检验 }
\author{PD ET AL}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{目录Analysis of variance and the Kruskal-Wallis test }

\begin{enumerate}
\item[7.1.] 单因素方差分析 One-way analysis of variance 
\item[7.2.] Kruskal-Wallis检验 Kruskal-Wallis test
\item[7.3.] 双因素方差分析 Two-way analysis of variance
\item[7.4.] Friedman检验 The Friedman test 
\item[7.5.] 回归分析中的方差分析表 The ANOVA table in regression analysis 
\item[7.6.] 书中习题 Exercises
\item[7.7.] 单项选择题
\item[7.8.] 简答题

\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{课程讲解重点难点 }

\begin{enumerate}

%\item  学习目标：组内方差、组间方差与全局方差，方差分析的思想。自由度的计算，anova函数的使用方法，理解方差分析表，对属性向量的回归分析，成对比较和多重比较，pairwise.t.test函数，分组数据的条形图，叠加均值与标准误，Bartlett检验，Kruskal-Wallis检验。双因素，行间方差，列间方差，多变量的回归分析，friedman.test函数。

\item 方差分析的基本理论知识，方差分析表。
\item 单因素方差分析：参数法、Kruskal-Wallis 检验。%数据：\verb+red.cell.folate+
\item 双因素方差分析：参数法、Friedman 检验。%数据：\verb+heart.rate+


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.1. 分组数据的组内均值与全局均值 }

\begin{itemize}
\item  {\color{red}问题：用表格描述分组数据，并计算组内均值和全局均值。}
\item 解答：注意每组的数据长度可能不一样。

\begin{table}[ht]
\begin{tabular}{|c|ccccc|c|} \hline
组别 & 数据 &&&&& 组内均值 \\ \hline 
第$1$组 & $x_{11}$ & $x_{12}$ & $x_{13}$ & $\cdots$ &  $x_{1n_1}$ & $\bar{x}_1$ \\ \hline  
第$2$组 & $x_{21}$ & $x_{22}$ & $x_{23}$ & $\cdots$ &  $x_{2n_2}$ & $\bar{x}_2$ \\ \hline
 $\cdots$ & $\cdots$ & $\cdots$ & $\cdots$ & $\cdots$ &   $\cdots$ & $\cdots$ \\ \hline
第$m$组 & $x_{m1}$ & $x_{m2}$ & $x_{m3}$ & $\cdots$ &  $x_{mn_m}$ & $\bar{x}_m$ \\ \hline  
\end{tabular}
\end{table}

全局均值是所有数据的均值，即下式，其中 $N=n_1+n_2+\cdots+n_m$.
\[ \bar{x}_. = \frac{1}{N} \sum\limits_{i=1}^{m} \sum\limits_{j=1}^{n_i} x_{ij}
=\frac{1}{N} \sum\limits_{i=1}^{m} n_i \bar{x}_i . \]

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.2. 单因素方差分析的基本问题和解决思路 }

\begin{itemize}
\item  {\color{red}问题：如何判断不同小组的均值是否有显著差异？}

\item 解答：研究{\color{blue}观测值与全局均值的差}，其平方和称为全局方差。

\begin{enumerate}
\item 将这个差写成{\color{blue}观测值与组内均值的差}与{\color{blue}组内均值与全局均值的差}的和，
\[ x_{ij}  = \bar{x}_. + (\bar{x}_i - \bar{x}_.) + (x_{ij}-\bar{x}_i) \]

\item 得到一个理论模型，其中假设误差项是正态分布的，
\[X_{ij} = \mu + \alpha_i + \varepsilon_{ij}, \,\, \varepsilon_{ij} \sim N(0,\sigma^2). \]

\item 将问题写成假设检验的形式，（零假设：小组之间无显著差异）
\[ H_0: \alpha_1=\alpha_2=\cdots=\alpha_m=0, \textrm{ v.s. } H_1: \exists i, \alpha_i\neq 0. \]

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.3. 组内方差与组间方差 }

\begin{itemize}
\item  {\color{red}问题：证明全局方差等于组内方差与组间方差的和，即
\[ SSD_{total} = SSD_{within} + SSD_{between}. \]
}

\vspace{-0.3cm}

\item 解答：写出这三个方差的定义，展开计算即得。
\begin{eqnarray*}
SSD_{total} &=& \sum\limits_{i=1}^{m} \sum\limits_{j=1}^{n_i} (x_{ij} - \bar{x}_.)^2 \\ 
SSD_{within} &=& \sum\limits_{i=1}^{m} \sum\limits_{j=1}^{n_i} (x_{ij} - \bar{x}_i)^2 \\
SSD_{between} &=& \sum\limits_{i=1}^{m} \sum\limits_{j=1}^{n_i} (\bar{x}_i - \bar{x}_.)^2 = \sum\limits_{i=1}^{m} n_i (\bar{x}_i - \bar{x}_.)^2
\end{eqnarray*}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.4. 方差分析的检验统计量 }

\begin{itemize}
\item  {\color{red}问题：方差分析的基本思路是什么？检验统计量是什么？}

\item  解答：

\begin{enumerate}

\item  比较组间方差与组内方差，来判断不同小组的均值是否有显著差异。

\item  检验统计量是平均组间方差与平均组内方差的商，
\[ {\color{blue} \boxed{F = \frac{MS_{between}}{MS_{within}} = \frac{SSD_{between}/(k-1)}{SSD_{within}/(N-k)} }. } \]

\item The {\color{blue}total variation} is split into a term describing {\color{blue}differences between group means} and a term describing {\color{blue}differences between individual measurements within the groups}. 

\item The grouping explains part of the total variation, and an informative grouping will explain a large part of the variation.

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.5. 方差分析的检验统计量 }

\begin{itemize}
\item  {\color{red}问题：什么是单因素方差分析表？}

\item  解答：

\begin{table}[ht]
\begin{tabular}{c|ccccc} \hline
来源 & 自由度 & 平方和 & 均方 & F 值 & p值 \\ \hline 
因素 & $k-1$  & $SSD_{between}$ & $MS_{between}$ & $F$ & $p$ \\ \hline 
误差 & $N-k$ & $SSD_{within}$ & $MS_{within}$ && \\ \hline 
总和 & $N-1$ & $SSD_{total}$ &&& \\ \hline 
\end{tabular}
\end{table}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.6. 检验统计量的密度函数与$p$值}

\begin{itemize}
\item  {\color{red}问题：画出自由度为 $(5,12)$ 的 F 分布的密度函数的图像。}

\item  解答：

\begin{center}
\includegraphics[height=0.7\textheight, width=0.9\textwidth]{plot-7-1-0-6.png}
\end{center}

%x <- seq(0,5,0.05)  #横坐标等差数列
%y <- df(x,5,12)  #密度函数的值
%plot(x,y,type='l',ylab='y = df(x,5,12)',ylim=c(0,0.8))  #画出密度函数的图像
%x1 <- x[x>=2.5]  # F统计值及其右边（阴影部分的下界）
%y1 <- y[x>=2.5]  # 阴影部分的上界
%polygon(c(x1,tail(x1,1),head(x1,1)),c(y1,0,0),col='blue')
%abline(h=0)  #横坐标轴
%abline(v=0)  #纵坐标轴


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.7. 一个简单例子 }

\begin{itemize}
\item  {\color{red}问题：考虑下述分组数据，计算组内平方和和组间平方和，判断不同小组的均值是否有显著差异。

\begin{table}[ht]
\begin{tabular}{|c|p{1cm}p{1cm}p{1cm}p{1cm}p{1cm}|} \hline
组别 &数据&&&& \\ \hline 
第$1$组 & $1$ & $2$ & $3$  &   &  \\ \hline  
第$2$组 & $4$ & $5$ & $6$  & 7  &  \\ \hline 
第$3$组 & $8$ & $9$ & $10$  & 11  & 12 \\ \hline 
\end{tabular}
\end{table}
}

\item 解答：组内平方和为 $17$, 组间平方和为 $126$. $F$ 统计值为 $33.35$, $p$值为 $0.00006887$. 因此这三组数据的均值有显著差异。


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.8. 茆诗松书本例子8.1.1. }

\begin{itemize}
\item  {\color{red}问题：在饲料养鸡增肥的研究中，某研究所提出三种饲料配方：A1是以鱼粉为主的饲料，A2是以槐米粉为主的饲料，A3是以苜蓿粉为主的饲料。为比较三种饲料的效果，特选 24 只相似的雏鸡随机均分为三组，每组各喂一种饲料，60 天后观察它们的质量。试验结果如表所示。

{\footnotesize
\begin{table}[ht]
\begin{tabular}{c|cccccccc} \hline
饲料 &质量&&&&&&& \\ \hline 
A1 & 1073& 1009& 1060& 1001& 1002& 1012& 1009& 1028  \\ \hline  
A2 & 1107& 1092&  990& 1109& 1090& 1074& 1122& 1001 \\ \hline 
A3 & 1093& 1029& 1080& 1021& 1022& 1032& 1029& 1048 \\ \hline 
\end{tabular}
\end{table}
}
}

\item 解答：方差分析的 p 值为 $0.0456$, 小于 $\alpha=0.05$，故拒绝原假设，认为不同饲料对鸡的增肥作用有明显的差别。

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.9. 茆诗松书本例子8.1.1. }

\begin{itemize}
\item  {\color{red}问题：计算上述数据的方差分析表。}

\item 解答：先把数据放在一个数据框里，然后调用lm函数和anova函数。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> x1 = c(1073, 1009, 1060, 1001, 1002, 1012, 1009, 1028)
> x2 = c(1107, 1092,  990, 1109, 1090, 1074, 1122, 1001)
> x3 = c(1093, 1029, 1080, 1021, 1022, 1032, 1029, 1048)
> x = c(x1,x2,x3)
> A = gl(3,8,24); levels(A)=c('A1','A2','A3')
> mydata = data.frame(x,A)
> anova(lm(x~A,data=mydata))
Analysis of Variance Table

Response: x
          Df  Sum Sq Mean Sq F value  Pr(>F)  
A          2  9660.1  4830.0  3.5948 0.04543 *
Residuals 21 28215.9  1343.6                  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.10. 红细胞叶酸盐数据 }

\begin{itemize}
\item  {\color{red}问题：载入数据框 \verb+red.cell.folate+, 解释每列数据的含义。}

\item 解答：体会因子型变量是如何存储分组数据的。
\begin{itemize}
\item  \,{\color{blue}\verb+folate+}: a numeric vector, folate concentration (mg/l). 
\item  \,{\color{blue}\verb+ventilation+}: a factor with three levels.  
	\begin{itemize}
	\item N2O+O2, 24h: 50\% nitrous oxide and 50\% oxygen, continuously for 24 hours; 
	\item N2O+O2, op: 50\% nitrous oxide and 50\% oxygen, only during operation; 
	\item O2, 24h: no nitrous oxide but 35\%–50\% oxygen for 24 hours.
	\end{itemize}
\end{itemize}
\item  词汇表：

\begin{center}
\begin{tabular}{|ll|ll|} \hline 
folate & 叶酸 & concentration & 浓度 \\ \hline 
ventilation & 通风 & nitrous oxcide & 一氧化二氮 \\ \hline 
oxygen & 氧气 & operation & 手术 \\ \hline 
\end{tabular}
\end{center}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.11. 红细胞叶酸盐数据的方差分析表(程序与结果)}

\begin{itemize}
\item  {\color{red}问题：用方差分析，推断不同的通风条件是否影响叶酸浓度。}

\item 解答：使用 \,{\color{blue}\verb+anova+} 函数，注意输入参数是一个线性回归模型。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> library(ISwR)
> attach(red.cell.folate)
> lm01 <- lm(folate~ventilation)
> summary(lm01)
> anova(lm01)
Analysis of Variance Table

Response: folate
            Df Sum Sq Mean Sq F value  Pr(>F)  
ventilation  2  15516  7757.9  3.7113 0.04359 *
Residuals   19  39716  2090.3                  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.12. 红细胞叶酸盐数据的方差分析表(结果解读)}

\begin{itemize}
\item  {\color{red}如何解读这个方差分析表？}

\item 解答：

\begin{enumerate}
\item $p$ 值较小，说明不同通风条件对红细胞里叶酸盐的浓度有影响。
\item 因子 {\color{blue}\texttt{ventilation}} 有 $p=3$ 个水平，故自由度为 $p-1=2$.
\item 残差的自由度是 $n-p=22-3=19$. 
\item 组间差异平方和是 15516, 组内差异平方和是 39716. 
\item 各自除以自由度，得到平均组间差异平方和和平均组内差异平方和。 
\item 相除得到 $F$ 统计量的值，$F=MS_{between}/MS_{within}=3.7113$.
\item 这个 $F$ 统计量服从 $F(2,19)$ 分布，由此求出 $p$ 值等于 0.04359. \\ 
		例如使用 {\color{blue}\texttt{> 1-pf(3.7113,2,19)}}.
\item 由于 $p$ 值小于 0.05, 说明分组较大地解释了因变量的方差。
\end{enumerate}
    
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.13. $F(2,19)$ 分布的密度函数与$p$值}

\begin{center}
\includegraphics[height=0.7\textheight, width=0.9\textwidth]{plot-7-1-0-8.png}
\end{center}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.14.  }

\begin{itemize}
\item  {\color{red}问题：如何画出F分布的概率密度函数与p值的示意图？}

\item 解答：

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> x <- seq(0,5,0.1)  #横坐标等差数列
> y <- df(x,2,19)  #密度函数的值
> plot(x,y,type='l')  #画出密度函数的图像
> x1 <- x[x>=3.7113]  # F统计值及其右边（阴影部分的下界）
> y1 <- y[x>=3.7113]  # 阴影部分的上界
> polygon(c(x1,tail(x1,1),head(x1,1)),c(y1,0,0),col='blue')
> abline(h=0)  #横坐标轴
> abline(v=0)  #纵坐标轴
\end{lstlisting}

\item 注：\\
{\color{blue}\texttt{tail(x1,1)}}是向量 \,{\color{blue}\texttt{x1}} 的最后一个分量，\\ 
{\color{blue}\texttt{head(x1,1)}}是向量 \,{\color{blue}\texttt{x1}}的第一个分量。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.15. 青春期阶段对生长因子的影响的方差分析(错误) }

\begin{itemize}
\item  {\color{red}问题：为研究青春期不同阶段的生长因子的差异，进行下述方差分析。其中有什么错误？}

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> #library(ISwR)
> head(juul); summary(juul); attach(juul)
> anova(lm(igf1~tanner))
Analysis of Variance Table

Response: igf1
           Df   Sum Sq  Mean Sq F value    Pr(>F)    
tanner      1 10985605 10985605  686.07 < 2.2e-16 ***
Residuals 790 12649728    16012                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
\end{lstlisting}

\item 解答：\,{\color{blue}\verb+tanner+} 还是数值型，不是因子型。因此这里做的不是分组数据的方差分析。

%who tells the tale? 
%\hfill (Pirate slang: 'Dead man tells no tale.')

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.16.  }

\begin{itemize}
\item  {\color{red}问题：残差自由度 790 是怎么得出来的？}

\item 解答：这是所研究的两个变量的完整数据的长度，减去2得到的。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> mydata01=juul[c('igf1','tanner')]
> cc=complete.cases(mydata01)
> mydata02=mydata01[cc,]
> nrow(mydata02)
[1] 792
\end{lstlisting}

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.17. 青春期阶段对生长因子的影响的方差分析(正确) }

\begin{itemize}
\item  {\color{red}问题：将青春期阶段设成因子型数据，对生长因子做方差分析。}

\item 解答：结果表明 $p$ 值约等于零，说明有很大影响。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> #library(ISwR)
> #attach(juul)
> tanner <- factor(tanner,labels=c('i','ii','iii','iv','v'))
> lm01 <- lm(igf1~tanner)
> summary(lm01)
> anova(lm01)
Analysis of Variance Table

Response: igf1
           Df   Sum Sq Mean Sq F value    Pr(>F)    
tanner      4 12696217 3174054  228.35 < 2.2e-16 ***
Residuals 787 10939116   13900                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
\end{lstlisting}

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.18.  }

\begin{itemize}

\item  {\color{red}如何理解自变量是分组数据时候的回归模型的结果？}

\item 解答：截距项系数是第一组的 \,{\color{blue}\texttt{igf1}} 数据的均值。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> tanner <- factor(tanner,labels=c('i','ii','iii','iv','v'))
> lm01 <- lm(igf1~tanner)
> summary(lm01)

Call:
lm(formula = igf1 ~ tanner)

Residuals:
    Min      1Q  Median      3Q     Max 
-365.33  -71.37  -10.24   61.53  448.67 
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.0.19. 回归模型 }

\begin{itemize}

\item  继续上一页的程序输出：

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  207.473      6.685  31.034   <2e-16 ***
tannerii     145.199     15.597   9.309   <2e-16 ***
tanneriii    275.750     18.804  14.665   <2e-16 ***
tanneriv     305.545     16.863  18.120   <2e-16 ***
tannerv      257.862      9.477  27.208   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 117.9 on 787 degrees of freedom
  (547 observations deleted due to missingness)
Multiple R-squared:  0.5372,	Adjusted R-squared:  0.5348 
F-statistic: 228.4 on 4 and 787 DF,  p-value: < 2.2e-16

\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.1.1. 红细胞叶酸盐的回归模型(与基准组的比较) }

\begin{itemize}
\item  {\color{red}问题：找出不同通风条件下，叶酸盐的浓度的差异。}

\item 解答：从回归模型的 \,{\color{blue}\texttt{summary}} 可以看出，
作为基准组的第一组的均值显著不等于零，第一组和第二组有较大差异，和第三组的差异不明显。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> lm01 <- lm(folate~ventilation)
> summary(lm01)
Call:
lm(formula = folate ~ ventilation)
Residuals:
    Min      1Q  Median      3Q     Max 
-73.625 -35.361  -4.444  35.625  75.375 
Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)            316.62      16.16  19.588 4.65e-14 ***
ventilationN2O+O2,op   -60.18      22.22  -2.709   0.0139 *  
ventilationO2,24h      -38.62      26.06  -1.482   0.1548    
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.1.2. 红细胞叶酸盐的回归模型(成对比较) }

\begin{itemize}
\item  {\color{red}问题：如何比较所有组两两之间的差异？}

\item 解答：使用  \,{\color{blue}\verb+pairwise.t.test()+} 函数。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> pairwise.t.test(folate,ventilation)
> pairwise.t.test(folate,ventilation,p.adj='bonferroni')

          N2O+O2,24h N2O+O2,op
N2O+O2,op 0.042      -        
O2,24h    0.464      1.000    
\end{lstlisting}

\item 解读：这是三组的均值两两比较的 $p$ 值。从中看出 \,{\color{blue}\texttt{N2O+O2,24h}} 与 \,{\color{blue}\texttt{N2O+O2,op}} 这两组的均值有较大的差异。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.2. 放宽对方差的假设  }

\begin{itemize}
\item  {\color{red}问题：方差分析需要假设各组的方差相等。如何放宽这个假设？}

\item 解答：The traditional one-way ANOVA requires an assumption of equal variances for all groups. There is, however, an alternative procedure that does not require that assumption. It is due to Welch and similar to the unequal-variances t test. 
This has been implemented in the \,{\color{blue}\texttt{oneway.test}} function. 
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> oneway.test(folate ~ ventilation)
\end{lstlisting}


\item 解答：It is also possible to perform the pairwise \,{\color{blue}\text{t}} tests so that they do not use a common pooled standard deviation. %This is controlled by the argument \,{\color{blue}\verb+pool.sd+}.
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> pairwise.t.test(folate, ventilation, pool.sd = F)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.3.1. 红细胞叶酸盐的分组数据的条形图(程序) }

\begin{itemize}
\item  {\color{red}问题：将 \texttt{red.cell.folate} 的分组数据用条形图画出来，再叠加每组数据的均值和标准误差。}

\item 解答：先计算每组的均值、方差、和标准误差。再画出条形图、一个标准误差的区间、以及三个均值点之间的连线。
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> xbar <- tapply(folate,ventilation,mean)
> s <- tapply(folate,ventilation,sd)
> n <- tapply(folate,ventilation,length)
> sem <- s/sqrt(n)
> stripchart(folate~ventilation,pch=16,vert=T)
> arrows(1:3,xbar+sem,1:3,xbar-sem,angle=75,code=3)
> lines(1:3,xbar,pch=1,type='b',cex=2)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.3.2. 红细胞叶酸盐的分组数据的条形图(图像) }

\begin{figure}
\centering
\includegraphics[height=0.6\textheight, width=0.7\textwidth]{plot-7-1-3-1.png}
\caption{red.cell.folate data with $\bar{x}\pm 1\textrm{SEM}$ }
\end{figure}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.1.4. Bartlett's Test}

\begin{itemize}
\item  {\color{red}问题：如何检验按 {\texttt{ventilation}} 分组的 {\texttt{folate}} 数据是否有相同方差？}

\item 解答：使用 \,{\color{blue}\texttt{bartlett.test}} 函数。Description: Performs Bartlett's test of the null that the variances in each of the groups (samples) are the same.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> bartlett.test(folate~ventilation)

	Bartlett test of homogeneity of variances

data:  folate by ventilation
Bartlett K-squared = 2.0951, df = 2, p-value = 0.3508
\end{lstlisting}

\item 解读结果：$p$ 值不接近零，所以接受方差相等的假设。
\item 词汇表：

\begin{center}
\begin{tabular}{|cc|cc|cc|} \hline 
homogeneity & 齐性 & variance & 方差 & statistic &统计量 \\ \hline 
\end{tabular}
\end{center}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.2.1. Kruskal-Wallis test }

\begin{itemize}
\item  {\color{red}问题：KW检验是方差分析的非参数版本。它的统计量是怎么构造的？}

\item 解答：Data are replaced with their ranks without regard to the grouping, and the test is based on the between-group sum of squares calculated from the average ranks.
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> kruskal.test(folate~ventilation)

	Kruskal-Wallis rank sum test

data:  folate by ventilation
Kruskal-Wallis chi-squared = 4.1852, df = 2, p-value = 0.1234
\end{lstlisting}


\item Description: Performs a Kruskal-Wallis rank sum test of {\bf \color{blue}the null} that the location parameters of the distribution of x are the same in each group (sample). {\bf\color{blue}The alternative} is that they differ in at least one.


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.2.2. Kruskal-Wallis test statistic}

\begin{itemize}

\item {\color{red}问题：写出 KW 统计量的计算公式，并验证上一页的结果。}

\item  解答：计算公式为
\[ KW = \frac{12}{n(n+1)} \sum\limits_{i=1}^{m} n_i(R_i-R)^2. \]

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(red.cell.folate)
> red.cell.folate$rof <- rank(folate)
> rofbar <- tapply(rof,ventilation,mean)
> ni <- tapply(rof,ventilation,length)
> kw <- sum(ni*(rofbar-mean(1:22))^2)*12/22/23
> kw
[1] 4.185244
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.2.3. 计算第一组的秩 }

\begin{itemize}
\item {\color{red}问题：列出第一组数据在所有数据中的秩。} 

\item 解答：$R_1$ 等于下表中8个秩的平均值，$R$ 等于1:22的平均值。

{\footnotesize 
\begin{table}[ht]
\centering
%\caption{第一组的秩}
\begin{tabular}{|c|c|c|c|}\hline 
序号 & folate & ventilation & rank(folate) \\ \hline 
1&     243&  N2O+O2,24h   &5 \\ \hline 
2&     251&  N2O+O2,24h   &7 \\ \hline 
3&     275&  N2O+O2,24h  &12 \\ \hline 
4&     291&  N2O+O2,24h  &14 \\ \hline 
5&     347&  N2O+O2,24h  &19 \\ \hline 
6&     354&  N2O+O2,24h  &20 \\ \hline 
7&     380&  N2O+O2,24h  &21 \\ \hline 
8&     392&  N2O+O2,24h  &22 \\ \hline 
\end{tabular}
\end{table}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.0.1. 双因素方差分析的数据与均值}

\begin{itemize}

\item  {\color{red}问题：用表格描述数据，并计算行均值、列均值、和总的均值。}

\item 解答：这里设每行数据的个数都一样。

\begin{table}[ht]
\begin{tabular}{|c|c|c|c|c|c|c|} \hline
数据 & $j=1$ & $j=2$ & $j=3$ & $\cdots$ & $j=n$ & 行均值 \\ \hline 
$i=1$ & $x_{11}$ & $x_{12}$ & $x_{13}$ & $\cdots$ &  $x_{1n}$ & $\bar{x}_{1\cdot}$ \\ \hline  
$i=2$ & $x_{21}$ & $x_{22}$ & $x_{23}$ & $\cdots$ &  $x_{2n}$ & $\bar{x}_{2\cdot}$ \\ \hline
 $\cdots$ & $\cdots$ & $\cdots$ & $\cdots$ & $\cdots$ &   $\cdots$ & $\cdots$ \\ \hline
$i=m$ & $x_{m1}$ & $x_{m2}$ & $x_{m3}$ & $\cdots$ &  $x_{mn}$ & $\bar{x}_{m\cdot}$ \\ \hline  
列均值 & $\bar{x}_{\cdot 1}$ & $\bar{x}_{\cdot 2}$ & $\bar{x}_{\cdot 3}$ & $\cdots$ &  $\bar{x}_{\cdot n}$ & $\bar{x}_{\cdot\cdot}$ \\ \hline  
\end{tabular}
\end{table}

{\small
\begin{eqnarray*}
\textrm{行均值: } \bar{x}_{i\cdot} = \frac{1}{n} \sum\limits_{j=1}^{n} x_{ij}, \hspace{0.2cm} 
\textrm{列均值: } \bar{x}_{\cdot j} = \frac{1}{m} \sum\limits_{i=1}^{m} x_{ij}, \hspace{0.2cm} 
\textrm{均值: } \bar{x}_{\cdot \cdot} = \frac{1}{mn} \sum\limits_{i=1}^{m}\sum\limits_{j=1}^{n} x_{ij}. 
\end{eqnarray*}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.0.2. 双因素方差分析的各方差}

\begin{itemize}
\item {\color{red}问题：计算行间方差、列间方差、残差方差、总方差。}

\item 解答：方差是数据偏离其平均值的程度的一种衡量。
{\small
\begin{eqnarray*}
\text{行间方差：} SSD_{row} &=& n\sum\limits_{i=1}^{m} (\bar{x}_{i\cdot} - \bar{x}_{\cdot\cdot})^2,\,\, \textrm{自由度：}m-1,\\
\text{列间方差：} SSD_{column} &=& m\sum\limits_{j=1}^{n} (\bar{x}_{\cdot j} - \bar{x}_{\cdot\cdot})^2,\,\, \textrm{自由度：}n-1, \\
\text{残差方差：} SSD_{residual} &=& \sum\limits_{i=1}^{m}\sum\limits_{j=1}^{n} (x_{ij}-\bar{x}_{i\cdot} - \bar{x}_{\cdot j}+ \bar{x}_{\cdot\cdot})^2, \,\, \textrm{自由度：}(m-1)(n-1),\\
\text{总方差：} SSD_{total} &=& \sum\limits_{i=1}^{m}\sum\limits_{j=1}^{n} (x_{ij} - \bar{x}_{\cdot\cdot})^2,\,\, \textrm{自由度：}mn-1.
\end{eqnarray*}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.0.3. 双因素方差分析的统计模型}

\begin{itemize}

\item  {\color{red}问题：解释下述统计模型的含义：
$$X_{ij} = \mu + \alpha_i + \beta_j + \varepsilon_{j}, \,\, \varepsilon_{ij}\sim N(0,\sigma^2).$$
}

\item 解答：各部分的含义如下，其中前三个是参数，
\begin{enumerate}
\item  $\mu$ 是总体均值，
\item  $\alpha_i$ 是行效应，要求：行效应总和等于零，
\item  $\beta_j$ 是列效应，要求：列效应总和也等于零，
\item  $\varepsilon_{ij}$ 是未能解释部分，假设服从方差为 $\sigma^2$ 的正态分布。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.0.4. 双因素方差分析的统计模型}

\begin{itemize}

\item  {\color{red}问题：解释双因素方差分析的思路。}

\item 解答：

\begin{enumerate}

\item  将平均行间(列间)方差与平均残差方差比较，写出统计量。例如
\[ F=\frac{SSD_{row}/(m-1)}{SSD_{residual}/((m-1)(n-1))}. \]

\item  如果行效应显著，那么上述 F 统计量的值会很大，落入拒绝域。

\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.0.5. 心率数据的变量的含义与类型 }

\begin{itemize}
\item  {\color{red}问题：载入 \verb+heart.rate+ 数据，写出三个变量的含义与数据类型。}

\item 解答：可以使用  \,{\color{blue}\verb+str()+} 函数查看数据。\\
 \,{\color{blue}\verb+str()+} compactly display the structure of an arbitrary R object.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(heart.rate)
> head(heart.rate)
> str(hr)
> str(subj)
> str(time)
\end{lstlisting}

\item 解答：
\begin{enumerate}
\item  变量 \,{\color{blue}\texttt{hr}} 是心率，每分钟的心跳数，数值型； 
\item  变量 \,{\color{blue}\texttt{subj}} 是病人类型，因子型，水平为 1-9，共9类；
\item  变量 \,{\color{blue}\texttt{time}} 是时间，因子型，水平为 0, 30, 60, 90, 单位：分钟之后。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.0.6. 心率数据的双因素方差分析}

\begin{itemize}
\item  {\color{red}问题：用因素 {\texttt{subj}} 和因素 {\texttt{time}} 对 {\texttt{hr}} 进行方差分析，解释结果。}

\item 解答：注意 \,{\color{blue}\texttt{anova()}} 的输入是一个回归模型，即 \,{\color{blue}\texttt{lm()}} 的输出。
并注意模型公式 \,{\color{blue}\texttt{hr $\sim$ subj + time}} 的简洁使用。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> anova(lm(hr~subj+time))
Analysis of Variance Table

Response: hr
          Df Sum Sq Mean Sq F value    Pr(>F)    
subj       8 8966.6 1120.82 90.6391 4.863e-16 ***
time       3  151.0   50.32  4.0696   0.01802 *  
Residuals 24  296.8   12.37                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1                     
\end{lstlisting}

\item 回答：不同类型的病人的心率有很大不同；测试时间是在多少分钟之后，对心率也有较大影响。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.0.7. 为平衡的试验设计生成带模式的因子型数据}

\begin{itemize}
\item {\color{red}问题：如何用  {\texttt{gl()}} 函数生成因子型数据？}

\item 解答： \,{\color{blue}\texttt{gl()}} 函数的三个参数分别是水平的数目、每块长度与数据的总长度，第四个参数是水平的名称。gl 是指 generate levels, 生成不同的水平。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> my.hr <- c(96,110,89,95,128,100,72,79,100,92,106,
+             86,78,124,98,68,75,106,86,108,85,78,118,100,
+             67,74,104,92,114,83,83,118,94,71,74,102)
> my.subj <- gl(9,1,36)
> my.time <- gl(4,9,36,labels=c(0,30,60,90))
> my.heart.rate <- data.frame(my.hr, my.subj, my.time)
\end{lstlisting}

\item 先将各变量的数据保存好，再用 \,{\color{blue}\texttt{data.frame()}} 函数将各变量组成一个数据框。

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.1.1. 重复试验的图像 }

\begin{itemize}
\item {\color{red}问题：什么是意大利面图 spaghettigram ？}

\item 解答：It is a plot where data from the same subject are connected with lines.
使用 \,{\color{blue}\texttt{interaction.plot()}} 函数，可以画出意大利面图。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(heart.rate)
> interaction.plot(time,subj,hr)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.3.1.2.  }

\begin{center}
\includegraphics[height=0.7\textheight, width=0.7\textwidth]{plot-7-3-1.png}
\end{center}


\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.4.1. The Friedman test}

\begin{itemize}
\item {\color{red}问题： Friedman 检验是双因素方差分析的一种非参数方法。写出这个检验的思路和统计量。}

\item 思路：Friedman's test is based on {\color{blue}ranking observations within each row} assuming that if there is no column effect then all orderings should be equally likely. A test statistic based on {\color{blue}the column sum of squares} can be calculated and normalized to give a $\chi^2$-distributed test statistic.


\item 构造统计量：
%这是一个很好的作业。要么自己创造发挥，要么查阅文献寻找答案。
可以跟Kruskal-Wallis检验进行类比。

\item 计算$p$值：这等价于研究上述构造的这个统计量的分布。


\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.4.2. 心率数据的双因素方差分析(Friedman检验)}

\begin{itemize}
\item  {\color{red}问题：用Friedman检验对心率数据进行方差分析。}

\item 解答：看到 $p$ 值较小，说明这两个因素对心率有影响。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> friedman.test(hr~time|subj, data=heart.rate)

	Friedman rank sum test

data:  hr and time and subj
Friedman chi-squared = 8.5059, df = 3, p-value = 0.03664
\end{lstlisting}

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.5.1. The ANOVA table in regression analysis }

\begin{itemize}
\item  {\color{red}问题：在一元线性回归模型中，也可以进行方差分析，说明其思路。
\[ y = \beta_0 + \beta_1 x + \varepsilon, \,\,\, \varepsilon \sim N(0,\sigma^2).\]
}

\vspace{-0.5cm}

\item 解答：
\begin{enumerate}
\item  记因变量的观测值 $y_i$, 平均值 $\bar{y}$, 回归值 $\hat{y}_i=\hat{\beta}_0+\hat{\beta}_1x_i$. 

\item  计算模型方差与残差方差，
\begin{eqnarray*}
SSD_{model} = \sum\limits_{i=1}^{n} (\hat{y}_i - \bar{y})^2,\,\,\,\, 
SSD_{residual} = \sum\limits_{i=1}^{n} (y_i - \hat{y}_i)^2.
\end{eqnarray*}

\item  构造检验统计量，并分析其分布，以计算概率，
\begin{eqnarray*}
F= \frac{SSD_{model}/(1)}{SSD_{residual}/(n-2) }.
\end{eqnarray*}

\end{enumerate}

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.5.2. 心室收缩速度数据的方差分析 }

\begin{itemize}
\item  {\color{red}问题：用方差分析研究血糖含量对心室收缩速度的影响。}

\item 解答：将线性回归模型的结果输入 \,{\color{blue}\texttt{anova()}} 函数。结果有较大影响。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(thuesen)
> lm01 <- lm(short.velocity~blood.glucose)
> summary(lm01)
> anova(lm01)
Analysis of Variance Table

Response: short.velocity
              Df  Sum Sq  Mean Sq F value Pr(>F)  
blood.glucose  1 0.20727 0.207269   4.414 0.0479 *
Residuals     21 0.98610 0.046957                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.1. Exercise. }

\begin{itemize}

\item  %7.1 
The \,{\color{blue}\texttt{zelazo}} data are in the form of a list of vectors, one for each of the four groups. 
%\begin{enumerate}
%\item  
Convert the data to a form suitable for the use of \,{\color{blue}\texttt{lm()}}, and calculate the relevant test. 
%\item  
Consider \,{\color{blue}\texttt{t}} tests comparing selected subgroups or obtained by combining groups.
%\end{enumerate}

\item  Let $\alpha=0.10$. Which statement is incorrect?
\begin{enumerate}[(a)]
\item  The true difference in means for the active training group and the rest is not equal to zero. 
\item  The true difference in means for the active training group and the control group is not equal to zero. 
\item  The true difference in means for the active training group and the passive training group is not equal to zero. 
\item  The true difference in means for the active training group and the no-training group is not equal to zero. 
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.1. Exercise - Answer. }

\begin{itemize}

\item  %7.1 
(c). The p-value is 0.2301 in this test. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
walk <- unlist(zelazo)
group <- factor(rep(1:4,c(6,6,6,5)), labels=names(zelazo))
summary(lm(walk ~ group))
t.test(zelazo$active,unlist(zelazo[-1])) # active vs. rest
t.test(zelazo$active,zelazo$ctr.8w) # active vs. control
t.test(zelazo$active,zelazo$passive) # first vs. passive
t.test(zelazo$active,zelazo$none) # active vs. none

\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.2. Exercise. }

\begin{itemize}

\item  %7.2 
In the \,{\color{blue}\texttt{lung}} data, do the three measurement methods give systematically different results? If so, which ones appear to be different?

\item  Let $\alpha=0.05$. Which statement is incorrect?
\begin{enumerate}[(a)]
\item  Method A and Method B give significantly different results. 
\item  Method A and Method C give significantly different results. 
\item  There is a significant difference between at least two Methods. 
\item  There is a significant difference between at least two Subjects. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.2. Exercise - Answer. }

\begin{itemize}

\item  %7.2 
(a). The p-value is 0.11975 for the test of the difference between Method B and Method A. Thus it is not significant. 
%A and C differ with B intermediate, not significantly different from either. (The B–C comparison is not available from the summary, but due to the balanced design, the standard error of that difference is 0.16656 like the two others.)

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> fit <- lm(volume~method+subject, data=lung)
> summary(fit)
...
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.17222    0.19232  16.494  1.4e-08 ***
methodB      0.28333    0.16656   1.701  0.11975    
methodC      0.60000    0.16656   3.602  0.00483 ** 
...
> anova(fit)
Analysis of Variance Table
Response: volume
          Df  Sum Sq Mean Sq F value  Pr(>F)  
method     2 1.08111 0.54056  6.4953 0.01557 *
subject    5 2.18278 0.43656  5.2457 0.01271 *
Residuals 10 0.83222 0.08322   
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.3. Exercise. }

\begin{itemize}

\item  %7.3 
Repeat the previous exercises using the \,{\color{blue}\texttt{zelazo}} and \,{\color{blue}\texttt{lung}} data with the relevant nonparametric tests.

%Do the analysis of variance for the \,{\color{blue}\texttt{zelazo}} data and the \,{\color{blue}\texttt{lung}} data with the relevant nonparametric tests. 

\item  Let $\alpha=0.05$. Which statement is incorrect?
\begin{enumerate}[(a)]
\item  The Kruskal-Wallis rank sum test shows there is no significant difference in the age to walk between the groups. %p-value = 0.0758
\item  The Wilcoxon rank sum test shows there is significant difference in the age to walk between the active group and the control group. %p-value = 0.03493
\item  The Wilcoxon rank sum test shows there is significant difference in the age to walk between the active group and the rest groups. %p-value = 0.02224
\item  The Wilcoxon signed rank test shows there is significant difference in the lung volume between method A and method C. %p-value = 0.05906
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.3. Exercise - Answer. }

\begin{itemize}

\item  %7.3 
(d). This test shows a p-value of 0.05906. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
walk <- unlist(zelazo)
group <- factor(rep(1:4,c(6,6,6,5)), labels=names(zelazo))

kruskal.test(walk ~ group)
wilcox.test(zelazo$active,zelazo$ctr.8w) # first vs. last
wilcox.test(zelazo$active,unlist(zelazo[-1])) # first vs. rest
#friedman.test(volume ~ method | subject, data=lung)
wilcox.test(lung$volume[lung$method=="A"],
            lung$volume[lung$method=="C"], paired=TRUE) # etc.
\end{lstlisting} 
            
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.4. Exercise. }

\begin{itemize}

\item  %7.4 
The \,{\color{blue}\texttt{igf1}} variable in the \,{\color{blue}\texttt{juul}} data set is arguably skewed and has different variances across Tanner groups. Try to compensate for this using logarithmic and square-root transformations, and use the Welch test. However, the analysis is still problematic - why?

\item  Let $\alpha=0.05$. Which statement is incorrect?
\begin{enumerate}[(a)]
\item  The Welch one-way test shows there is a significant difference in the means of sqrt(igf1) for different Tanner groups. 
\item  The Welch one-way test does not assume equal variances. 
\item  The pairwise t tests shows there is a significant difference in the means of sqrt(igf1) for the Tanner 3 group and the Tanner 5 group. 
\item  The pairwise t tests shows there is a significant difference in the means of sqrt(igf1) for the Tanner 4 group and the Tanner 5 group. 
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.6.4. Exercise - Answer. }

\begin{itemize}

\item  %7.4 
(c). The p-value of this test is 0.5005.

Only the square-root transform is shown; you can do the same for log-transformed and untransformed data. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
attach(juul)
hist(igf1); hist(log(igf1)); hist(sqrt(igf1))
tapply(sqrt(igf1),tanner, sd, na.rm=TRUE)
plot(sqrt(igf1)~jitter(tanner))
oneway.test(sqrt(igf1)~tanner)
pairwise.t.test(sqrt(igf1),tanner, pool.sd = F)
\end{lstlisting}

\item  
The square root looks nice, logarithms become skewed in the opposite direction. The transformations do not make much of a difference for the test. It is, however, a problem that strong age effects are being ignored, particularly within Tanner stage 1.


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.1. 单项选择题 }

\begin{itemize}

\item %1
载入程序包 \,{\color{blue}\texttt{ISwR}}, 载入数据 \,{\color{blue}\texttt{zelazo}}. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> library(ISwR)  #1 
> zelazo  #2 
> ?zelazo  #3 
> walk <- unlist(zelazo)  #4 
> group <- factor(rep(1:4,c(6,6,6,5)),labels=names(zelazo))  #5 
> mydata <- data.frame(walk=walk,group=group)  #6 
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.1. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是哪个？
\begin{enumerate}[(a)]
\item  这个数据 \,{\color{blue}\texttt{zelazo}} 是一个列表，包含了四组婴儿走路的年龄（月份）。
\item  第6行命令得到一个数据框 \,{\color{blue}\texttt{mydata}}, 包含23行，两个变量分别是走路年龄的数值型数据和表示组别的因子型数据。 
\item   第一组是测试组，接受积极的走路训练。
\item  第二组是消极训练组，不接受积极的运动训练。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.1. 单项选择题 }

\begin{itemize}

\item  解答：(d).
第二组是消极训练组，接受同第一组一样的 social and gross motor stimulation 训练，但没接受走路和位置训练。
参考课文270页的数据说明，或者阅读数据的帮助页面。


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.2. 单项选择题 }

\begin{itemize}

\item %2
研究 \,{\color{blue}\texttt{zelazo}} 数据，计算组内方差、组间方差和全局方差。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> x1 <- zelazo$active
> x2 <- zelazo$passive
> x3 <- zelazo$none
> x4 <- zelazo$ctr.8w
> x <- c(x1,x2,x3,x4)
> x1bar <- mean(x1)
> x2bar <- mean(x2)
> x3bar <- mean(x3)
> x4bar <- mean(x4)
> xbar <- mean(x)
> n1 <- length(x1)
> n2 <- length(x2)
> n3 <- length(x3)
> n4 <- length(x4)
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.2. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  数据 \,{\color{blue}\texttt{zelazo}} 的组内方差 $\text{SSD}_w$ 是 43.69. 
\item  数据 \,{\color{blue}\texttt{zelazo}} 的组间方差 $\text{SSD}_b$ 是 18.47. 
\item  全局方差总是组内方差与组间方差的和。
\item  如果组间方差远远大于组内方差，那么认为分组数据的均值是有显著差异的。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.2. 单项选择题 }

\begin{itemize}

\item  解答：(b).
经过下述计算，组间方差是 14.78.
\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> SSDw <- sum((x1-x1bar)^2)+sum((x2-x2bar)^2)+sum((x3-x3bar)^2)
  +sum((x4-x4bar)^2)
> SSDb <- n1*(x1bar-xbar)^2+n2*(x2bar-xbar)^2+n3*(x3bar-xbar)^2
  +n4*(x4bar-xbar)^2
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.3. 单项选择题 }

\begin{itemize}

\item %3
研究 \,{\color{blue}\texttt{zelazo}} 数据，计算平均组内方差 $\text{MS}_w$、平均组间方差 $\text{MS}_b$ 和 $F$ 统计量。

\item  下述说法中，不正确的是那个？

\begin{enumerate}[(a)]
\item  组内方差的自由度是 $N-k=23-4=19$, 平均组内方差是 2.299.
\item  组间方差的自由度是 $k-1=4-1=3$, 平均组间方差是 4.926.
\item  $F$ 统计量是平均组内方差除以平均组间方差，在数据与分组无关的零假设下，服从自由度为 $(k-1,N-k)$ 的 $F$ 分布。
\item  $F$ 统计量的统计值为 2.142, $F$ 统计量大于这个统计值的概率为 0.1285.
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.3. 单项选择题 }

\begin{itemize}


\item  解答：(c).
倒过来了，$F$ 统计量应该是平均组间方差除以平均组内方差。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> N=23
> k=4
> MSw <- SSDw/(N-k)
> MSb <- SSDb/(k-1)
> myf <- MSb/MSw
> 1-pf(myf,3,19)
[1] 0.1285456
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.4. 单项选择题 }

\begin{itemize}

\item %4
继续研究 \,{\color{blue}\texttt{zelazo}} 数据，使用对分组数据的线性回归。设显著性水平 $\alpha=0.05$. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> lm01 <- lm(walk~group,data=mydata)
> summary(lm01)
> anova(lm01)
\end{lstlisting}


\item  下述说法中，不正确的是那个？
\begin{enumerate}[(a)]
\item  这时的自变量是一个属性变量，它的数据类型是因子型的。
\item  方差分析表的 $p$ 值是 $0.1285$, 说明组间差异不显著。
\item  第四组与第一组的均值差为 2.225, 而且这个差异是显著的。
\item  这个线性模型的截距项是总体均值。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.4. 单项选择题 }

\begin{itemize}

\item  解答：(d).
这个线性模型的截距项是第一组的均值。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> lm01 <- lm(walk~group,data=mydata)
> summary(lm01)

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   10.1250     0.6191  16.355 1.19e-12 ***
grouppassive   1.2500     0.8755   1.428   0.1696    
groupnone      1.5833     0.8755   1.809   0.0864 .  
groupctr.8w    2.2250     0.9182   2.423   0.0255 *  

> anova(lm01)
Analysis of Variance Table

Response: walk
          Df Sum Sq Mean Sq F value Pr(>F)
group      3 14.778  4.9259  2.1422 0.1285
Residuals 19 43.690  2.2995 
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.5. 单项选择题 }

\begin{itemize}

\item %5
研究 \,{\color{blue}\texttt{zelazo}} 数据，不同组的走路年龄的均值是否存在差异。
%P.R. Zelazo, N.A. Zelazo, and S. Kolb (1972), “Walking” in the newborn, Science, 176: 314–315.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> pairwise.t.test(walk,group)
> pairwise.t.test(walk,group,p.adj='bonferroni')
> t.test(zelazo$active,zelazo$ctr.8w)
> oneway.test(walk~group)
> bartlett.test(walk~group)
> kruskal.test(walk~group)
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.5. 单项选择题 }

\begin{itemize}

\item  设显著性水平 $\alpha=0.05$. 下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  Bonferroni 修正方法试图解决多次检验的时候更容易出现小概率事件的问题。
\item  Welch 方法可以检验方差不相等时的均值差异。这里的函数 oneway.test() 的检验结果无法拒绝无差异的零假设。
\item  Bartlett 检验的零假设是不同组的方差是相等的。这里检验结果是不同组的方差有显著差异。
\item  Kruskal-Wallis 检验是方差分析的非参数版本，数据被替换为不考虑分组的秩。这里检验结果是组间没有显著差异。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.5. 单项选择题 }

\begin{itemize}

\item  解答：(c).
这里检验结果是不同组的方差没有显著差异。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> bartlett.test(walk~group)

	Bartlett test of homogeneity of variances

data:  walk by group
Bartlett K-squared = 1.7037, df = 3, p-value = 0.6361
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.6. 单项选择题 }

\begin{itemize}

\item %6
继续研究 \,{\color{blue}\texttt{zelazo}} 数据，用条形图画出分组数据，然后对每组数据，叠加均值和标准误的值。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> walkbar <- tapply(walk,group,mean)  #1 
> walksd <- tapply(walk,group,sd)  #2 
> walkn <- tapply(walk,group,length)  #3 
> sem <- walksd/sqrt(walkn)  #4 
> stripchart(walk~group,method='jitter',jitter=0.05,pch=16,vert=T)  #5 
> arrows(1:4,walkbar+sem,1:4,walkbar-sem,angle=45,code=3,length=0.1)  #6 
> lines(1:4,walkbar,pch=1,type='b',cex=1)  #7 
\end{lstlisting}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.6. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是哪个？

\begin{enumerate}[(a)]
\item  程序的前三行分别计算了各组数据的均值、标准差和样本容量。
\item  程序第四行计算了每组数据的均值的标准差，即 $\hat{\sigma}/\sqrt{n}$. 
\item  程序第五行画出了条形图，这是一种分组数据的散点图。横坐标是组别，纵坐标是数据的数值大小。
\item  程序第六行画出了每组数据的均值加减数据的一个标准差所形成的区间。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.6. 单项选择题 }

\begin{itemize}

\item  解答：：(d).
程序第六行画出了每组数据的均值加减每组数据的均值的一个标准差所形成的区间。

%\begin{center}
\begin{figure}[ht!]\centering
\includegraphics[height=0.5\textheight, width=0.8\textwidth]{ex-7-6-stripchart.png}
\caption{分组数据的条形图}
\end{figure}
%\end{center}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.7. 单项选择题 }

\begin{itemize}

\item %7
数据框 \,{\color{blue}\texttt{lung}} 包含了分别用三种方法测得的6位病人的肺活量。
测量方法和不同病人看做是影响肺活量的两个因素。设显著性水平 $\alpha=0.05$. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> lung
> ?lung
> lm07<-lm(volume~method+subject,data=lung)
> summary(lm07)
\end{lstlisting}


\item  下述说法中，不正确的是那个？

\begin{enumerate}[(a)]
\item  测量方法A与B的结果没有显著不同。
\item  测量方法A与C的结果有显著不同。
\item  第一个病人与第二个病人的肺活量有显著的不同。
\item  第一个病人与第三个病人的肺活量有显著的不同。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.7. 单项选择题 }

\begin{itemize}

\item  解答：(d).
第一个病人与第三个病人的肺活量的均值差的检验，$p$ 值等于0.68, 因此没有显著的不同。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> lm07 <- lm(volume~method+subject,data=lung)
> summary(lm07)
...
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.17222    0.19232  16.494  1.4e-08 ***
methodB      0.28333    0.16656   1.701  0.11975    
methodC      0.60000    0.16656   3.602  0.00483 ** 
subject2    -0.83333    0.23555  -3.538  0.00538 ** 
subject3     0.10000    0.23555   0.425  0.68016    
subject4    -0.06667    0.23555  -0.283  0.78293    
subject5    -0.03333    0.23555  -0.142  0.89027    
subject6    -0.60000    0.23555  -2.547  0.02900 *  
...
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.8. 单项选择题 }

\begin{itemize}

\item %8
研究数据框 \,{\color{blue}\texttt{lung}}, 下述程序画出了双因素的肺活量数据的交互图。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(lung)
> interaction.plot(method,subject,volume)
\end{lstlisting}


\item  下述说法中，不正确的是那个？

\begin{enumerate}[(a)]
\item  这个图的横坐标是三种测量方法。
\item  这个图的纵坐标是肺活量。
\item  这个图的每条折线代表一个病人。
\item  测量方法A的结果普遍比测量方法C的结果高。
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.8. 单项选择题 }

\begin{itemize}

\item  解答：(d).
从图中明显看出，测量方法A的结果普遍比测量方法C的结果低。
%\begin{center}
\begin{figure}[ht!]\centering
\includegraphics[height=0.5\textheight, width=0.8\textwidth]{ex-7-8-lung-volume.png}
\caption{双因素数据的交互图}
\end{figure}
%\end{center}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.9. 单项选择题 }

\begin{itemize}

\item %9
继续研究数据框 \,{\color{blue}\texttt{lung}}, 将数据组织成如下表格 $x_{ij},1\le i\le m, 1\le j\le n$, 其中 $m=6,n=3$. 
考虑统计模型 $X_{ij}=\mu + \alpha_i +\beta_j + \varepsilon_{ij}, \,\, \varepsilon_{ij}\sim N(0,\sigma^2)$. 

{\footnotesize 
\begin{table}[ht]\centering
\caption{肺活量看作是一个双因素的数据}\vspace{0.2cm}
\begin{tabular}{|c|c|c|c|} \hline
subject  & method A & method B & method C \\ \hline 
1 & 3.3 & 3.1 & 4.0  \\ \hline 
2 & 2.5 & 2.6 & 2.8  \\ \hline 
3 & 3.1 & 3.5 & 4.1  \\ \hline 
4 & 3.0 & 3.7 & 3.5  \\ \hline 
5 & 2.8 & 3.6 & 3.9  \\ \hline 
6 & 2.9 & 2.8 & 2.9  \\ \hline 
\end{tabular}
\end{table}
}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.9. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是那个？

\begin{enumerate}[(a)]
\item  这个数据的行间方差是 $\text{SSD}_r=2.183$.
\item  这个数据的列间方差是 $\text{SSD}_c=1.081$.
\item  这个数据的总方差是 $\text{SSD}_t=4.096$.
\item  这个数据的残差方差是 $\text{SSD}_{res}=0.238$.
\end{enumerate}


\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.9. 单项选择题 }

\begin{itemize}

\item  解答：：(d).
这个数据的残差方差是 $\text{SSD}_{res}=0.832$.

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> attach(lung)
> x <- lung$volume
> xbar <- mean(x)
> xidotbar <- tapply(volume,subject,mean) #每行的平均值
> xdotjbar <- tapply(volume,method,mean) #每列的平均值
> m <- 6 #行数
> n <- 3 #列数
> SSDr <- n*sum((xidotbar-xbar)^2) #行间方差
> SSDc <- m*sum((xdotjbar-xbar)^2) #列间方差
> SSDt <- sum((x-xbar)^2) #总方差
> SSDres <- SSDt-SSDr-SSDc #按公式计算残差方差
> xi <- rep(xidotbar,each=3) 
> xj <- rep(xdotjbar,6)
> mydata <- lung
> mydata$xi <- xi
> mydata$xj <- xj
> SSDres02 <- sum((x-xi-xj+xbar)^2) #按定义计算残差方差
\end{lstlisting}



\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.10. 单项选择题 }

\begin{itemize}

\item %10
研究数据框 \,{\color{blue}\texttt{lung}}, 分别使用参数模型和非参数模型，进行双因素方差分析。设显著性水平 $\alpha=0.05$. 

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> lm07<-lm(volume ~ method + subject)
> summary(lm07)
> anova(lm07)
> friedman.test(volume ~ subject | method)
> friedman.test(volume ~ method | subject)
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.10. 单项选择题 }

\begin{itemize}

\item  下述说法中，不正确的是那个？

\begin{enumerate}[(a)]
\item  参数方法检验，数据的行效应显著。
\item  参数方法检验，数据的列效应显著。
\item  非参数方法检验，数据的行效应显著。
\item  非参数方法检验，数据的列效应不显著。
\end{enumerate}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.7.10. 单项选择题 }

\begin{itemize}

\item  解答：(c).
非参数方法检验，数据的行效应不显著。检验的 $p$ 值为 0.06876, 略大于显著性水平。
下述代码是使用参数方法检验，直接调用anova函数以及从定义直接计算 $F$ 统计值和 $p$ 值。

\lstset{basicstyle=\scriptsize}
\begin{lstlisting}[language=R]
> anova(lm01)
Analysis of Variance Table

Response: volume
          Df  Sum Sq Mean Sq F value  Pr(>F)  
method     2 1.08111 0.54056  6.4953 0.01557 *
subject    5 2.18278 0.43656  5.2457 0.01271 *
Residuals 10 0.83222 0.08322  

> myfr <- SSDr/(m-1)/SSDres*(m-1)*(n-1)
> 1-pf(myfr,m-1,(m-1)*(n-1))
[1] 0.01271169
> myfc <- SSDc/(n-1)/SSDres*(m-1)*(n-1)
> 1-pf(myfc,n-1,(m-1)*(n-1))
[1] 0.01556838
\end{lstlisting}

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.8.1. 简答题 }

\begin{enumerate}

\item  （单因素方差分析）设 $x_{ij}$ 表示第 $i$ 组的第 $j$ 个观测值，其中 $1\le i\le m$, $1\le j\le n$.  记 $N=mn$. 记 $\bar{x}_{i}$ 为第 $i$ 组的均值，$\bar{x}$ 为全局均值。

\begin{enumerate}
\item  写出组内方差 $\text{SSD}_W$、组间方差 $\text{SSD}_B$ 和全局方差 $\text{SSD}_{total}$ 的计算公式。

\item  证明分解公式：$ \text{SSD}_B+\text{SSD}_W = \text{SSD}_{total}$. 

\item  为检验组间差异是否显著，构造如下统计量，解释该假设检验的推断过程。
\[ F= \frac{\text{MS}_B}{\text{MS}_W} = \frac{\text{SSD}_B/(m-1)}{\text{SSD}_W/(N-m)}. \]

\item  解释方差分析表的每一项的计算过程。

\end{enumerate}
\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.8.2. 简答题 }

\begin{enumerate}\setcounter{enumi}{1}

\item  （双因素方差分析）设 $x_{ij}$ 是一个 $m\times n$ 表的第 $i$ 行和第 $j$ 列的观测值。

\begin{enumerate}
\item  写出行间方差 $\text{SSD}_{row}$、列间方差 $\text{SSD}_{column}$、和残差方差 $\text{SSD}_{res}$ 的计算公式。

\item  这些平方和的自由度分别是多少？

\item  证明分解公式：$ \text{SSD}_{row}+\text{SSD}_{column} + \text{SSD}_{res} = \text{SSD}_{total}$. 

\item  构造统计量检验行间差异和列间差异是否显著。解释推断过程。

\end{enumerate}
\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.8.3. 简答题 }

\begin{enumerate}\setcounter{enumi}{2}

\item  研究 ISwR 程序包里的 thuesen 数据框。以 short.velocity 为应变量，以 blood.glucose 为自变量，进行回归分析和方差分析，解释方差分析表的各项结论。对该回归模型是否显著作出推断。


\item  研究 ISwR 程序包里的 lung 数据。对六个人分别用三种方法测量肺活量。三种测量方法的结果是否显著不同？哪个组的数据显著不同？使用参数方法和非参数方法分别进行检验。
%In the lung data, do the three measurement methods give systematically different results? If so, which ones appear to be different?
%Repeat the previous exercises using the zelazo and lung data with the relevant nonparametric tests.


\item  研究 ISwR 程序包里的 coking 数据，描述了不同炉宽和不同炉温的条件下，从煤炭炼制焦炭所需要的时间。
使用双因素方差分析，研究炉温和炉宽者两个因素对炼焦事件的作用是否显著。


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.8.6. 简答题 }

\begin{enumerate}\setcounter{enumi}{5}


\item  研究 ISwR 程序包里的 red.cell.folate 数据框，其中的 folate 数据是按 ventilation 分组的。
\begin{enumerate}
\item  用 lm() 函数对这个分组数据进行回归分析，解释得到的回归系数的含义。
\item 用 anova() 函数进行方差分析，判断组间差异是否显著。%求出 $F$ 统计值和检验的 $p$ 值。
\item 用 pairwise.t.test() 函数对这个分组数据进行成对比较。
\item 用 stripchart() 函数来画图展示这个分组数据。
\item 用 kruskal.test() 函数对这个分组数据进行 Kruskal-Wallis 的秩和检验。
\item  研究 Kruskal-Wallis 检验的统计量和推断原理。
\end{enumerate}


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.8.7. 简答题 }

\begin{enumerate}\setcounter{enumi}{6}

\item  研究 ISwR 程序包里的 heart.rate 数据框。
\begin{enumerate}
\item 解释变量 \verb+hr+, \verb+subj+ 和 \verb+time+ 的含义。
\item 用 gl() 函数为这个数据框背后的平衡试验生成因子型分类数据。
\item 用 lm() 函数和anova() 函数进行双因素方差分析，检验不同的病人和不同的时间者两个因素对心率的影响。
\item 用 interaction.plot() 函数画出这些数据的意大利面图。
\item 试用 Friedman 检验推断不同时间对心率的影响。
\item 研究 Friedman 检验的统计量和推断原理。
\end{enumerate}



\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.8.8. 简答题 }

\begin{enumerate}\setcounter{enumi}{7}


\item  研究 ISwR 程序包里的 juul 数据。其中的 igf1 数据是按 tanner 分组的。
\begin{enumerate}
\item  将 tanner 数据从数值型改为因子型。
\item  用 anova() 函数进行方差分析，判断组间差异是否显著。
\item  生长因子数据 igf1 是否服从正态分布？是否左右对称？
\item  根据 tanner 的取值不同，对 igf1 进行分组，使用 bartlett.test() 函数，或直接计算每组的方差，检验各组的方差是否有显著差异。
\item  使用 Welch 的 oneway.test() 函数，检验各组的 igf1 数据的均值是否有显著差异。
\item  使用求对数或平方根的方法，对 igf1 数据进行变换，检验其均值是否有显著差异。
\item  分析 tanner 和 age 这两个因素对 igf1 数据的影响。
\end{enumerate}
%The igf1 variable in the juul data set is arguably skewed and has different variances across Tanner groups. Try to compensate for this using logarithmic and square-root transformations, and use the Welch test. However, the analysis is still problematic — why?


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{7.8.9. 简答题 }

\begin{enumerate}\setcounter{enumi}{8}


\item  研究 ISwR 程序包里的 zelazo 数据。
\begin{enumerate}
\item  将这个分组数据保存为一个数据框。
\item  使用 lm() 函数进行回归分析。
\item  使用 t.test() 函数检验第一组和第四组的均值是否有显著差异。
\item  使用 t.test() 函数检验第一组和其余组的合并组的均值是否有显著差异。
\item  使用非参数方法进行均值差的检验。
\end{enumerate}
%The zelazo data are in the form of a list of vectors, one for each of the four groups. Convert the data to a form suitable for the use of lm, and calculate the relevant test. Consider t tests comparing selected subgroups or obtained by combining groups.


\end{enumerate}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{document}
